#!/usr/bin/make -rRf

# ntJoin: Scaffold assemblies using reference assemblies and minimizer graphs
# Written by Lauren Coombe @lcoombe
# ntJoin v1.0.2

# Input files
target=None
references=None
min_ref_targets=$(addsuffix .k$(k).w$(w).tsv, $(references))
fai_ref_targets=$(addsuffix .fai, $(references))

# List of edge weights
reference_weights=None
target_weight=1

# Path to minimizer assemble code
assemble_path=$(shell dirname $(realpath $(MAKEFILE_LIST)))

# Window size
w=1000

# Kmer size
k=32

# Number of threads
t=4

# Number of threads for ntjoin_assemble.py (Change with caution -- multi-processing increases memory)
assemble_t=1

# Minimum edge weight
n=1

# Minimum gap size between scaffolds
g=20

# Whether Mann-Kendall test will be run to orient contigs (True) or not (False)
mkt=False

# Output AGP file?
agp=False

# The proportion of increasing/decreasing minimizer positions needed to orient a contig
m=90

# Reference genome (for analysis)
ref=None

# Run Quast in large mode (1) or not (0)
large=0

# Cut the input contigs and putative misassemblies?
no_cut=False

# Prefix for output files
prefix=out.k$(k).w$(w).n$(n)

SHELL=bash -e -o pipefail
ifeq ($(shell zsh -e -o pipefail -c 'true' 2>/dev/null; echo $$?), 0)
# Set pipefail to ensure that all commands of a pipe succeed.
SHELL=zsh -e -o pipefail
# Report run time and memory usage with zsh.
export REPORTTIME=1
export TIMEFMT=time user=%U system=%S elapsed=%E cpu=%P memory=%M job=%J
endif

# Record run time and memory usage in a file using GNU time
ifneq ($(shell command -v gtime),)
time=command gtime -v -o $@.time
else
time=command time -v -o $@.time
endif

# Compress in parallel
ifneq ($(shell command -v pigz),)
gzip=pigz -p$t -f
else
gzip=gzip -f
endif

help:
	@echo ""
	@echo "ntJoin: Scaffolding assemblies using reference assemblies and minimizer graphs"
	@echo "ntJoin v1.0.2"
	@echo "Usage: ntJoin assemble target=<target scaffolds> references='List of reference assemblies' reference_weights='List of weights per reference assembly'"
	@echo ""
	@echo "Options:"
	@echo "target			Target assembly to be scaffolded in fasta format"
	@echo "references		List of reference files (separated by a space, in fasta format)"
	@echo "target_weight		Weight of target assembly [1]"
	@echo "reference_weights	List of weights of reference assemblies"
	@echo "prefix			Prefix of intermediate output files [out.k<k>.w<w>.n<n>]"
	@echo "t			Number of threads [4]"
	@echo "k			K-mer size for minimizers [32]"
	@echo "w			Window size for minimizers (bp) [1000]"
	@echo "n			Minimum graph edge weight [1]"
	@echo "g			Minimum gap size (bp) [20]"
	@echo "m			Minimum percentage of increasing/decreasing minimizer positions to orient contig [90]"
	@echo "mkt			If True, use Mann-Kendall Test to predict contig orientation (computationally-intensive, overrides 'm') [False]"
	@echo "agp			If True, output AGP file describing output scaffolds [False]"
	@echo "no_cut		If True, will not cut contigs at putative misassemblies [False]"
	@echo ""
	@echo "Notes: "
	@echo "	- Ensure the lists of reference assemblies and weights are in the same order, and that both are space-separated"
	@echo "	- Ensure all assembly files are in the current working directory"
	@echo ""

assemble: check_params \
	$(min_ref_targets) \
	$(target).k$(k).w$(w).tsv \
	$(fai_ref_targets) \
	$(target).fai \
	$(target).k$(k).w$(w).n$(n).assigned.scaffolds.fa \
	$(target).k$(k).w$(w).n$(n).unassigned.scaffolds.fa \
	$(target).k$(k).w$(w).n$(n).all.scaffolds.fa

analysis: check_params_analysis \
	$(addsuffix .bam.bai, $(references)) \
	$(target).bam.bai \
	$(target).k$(k).w$(w).n$(n).all.scaffolds.fa.bam.bai

all: check_params assemble analysis

check_params:
ifeq ($(references), None)
	$(error ERROR: Must set references)
endif
ifeq ($(reference_weights), None)
	$(error ERROR: Must set reference_weights)
endif
ifeq ($(target), None)
	$(error ERROR: Must set target)
endif

check_params_analysis:
ifeq ($(ref), None)
	$(error must set ref)
endif

version:
	@echo "ntJoin v1.0.2"
	@echo "Written by Lauren Coombe (lcoombe@bcgsc.ca)"

.PHONY: help all version analysis assemble check_params jupiter
.DELETE_ON_ERROR: $(prefix).n$(n).mx.dot
.SECONDARY:

%.k$(k).w$(w).tsv: %
	$(time) $(assemble_path)/src/indexlr --pos -k $(k) -w $(w) -t $(t) $< > $@

%.fai: %
	$(time) samtools faidx $<

%.fa.gz: %.fa
	$(time) $(gzip) $<
	
$(target).k$(k).w$(w).n$(n).assigned.scaffolds.fa: $(target).k$(k).w$(w).tsv $(min_ref_targets)
ifeq ($(mkt), True)
ifeq ($(agp), True)
ifeq ($(no_cut), True)
	$(time) $(assemble_path)/bin/ntjoin_assemble.py -p $(prefix) -n $(n) -s $< -l $(target_weight) \
	-r "$(reference_weights)" -k $(k) -g $(g) -t $(assemble_t) --mkt --agp --no_cut $(min_ref_targets)
else
	$(time) $(assemble_path)/bin/ntjoin_assemble.py -p $(prefix) -n $(n) -s $< -l $(target_weight) \
	-r "$(reference_weights)" -k $(k) -g $(g) -t $(assemble_t) --mkt --agp $(min_ref_targets)
endif
else
ifeq ($(no_cut), True)
	$(time) $(assemble_path)/bin/ntjoin_assemble.py -p $(prefix) -n $(n) -s $< -l $(target_weight) \
	-r "$(reference_weights)" -k $(k) -g $(g) -t $(assemble_t) --mkt --no_cut $(min_ref_targets)
else
	$(time) $(assemble_path)/bin/ntjoin_assemble.py -p $(prefix) -n $(n) -s $< -l $(target_weight) \
	-r "$(reference_weights)" -k $(k) -g $(g) -t $(assemble_t) --mkt $(min_ref_targets)
endif
endif
else
ifeq ($(agp), True)
ifeq ($(no_cut), True)
	$(time) $(assemble_path)/bin/ntjoin_assemble.py -p $(prefix) -n $(n) -s $< -l $(target_weight) \
	-r "$(reference_weights)" -k $(k) -g $(g) -t $(assemble_t) -m $(m) --agp --no_cut $(min_ref_targets)
else
	$(time) $(assemble_path)/bin/ntjoin_assemble.py -p $(prefix) -n $(n) -s $< -l $(target_weight) \
	-r "$(reference_weights)" -k $(k) -g $(g) -t $(assemble_t) -m $(m) --agp $(min_ref_targets)
endif
else
ifeq ($(no_cut), True)
	$(time) $(assemble_path)/bin/ntjoin_assemble.py -p $(prefix) -n $(n) -s $< -l $(target_weight) \
	-r "$(reference_weights)" -k $(k) -g $(g) -t $(assemble_t) -m $(m) --no_cut $(min_ref_targets)
else
	$(time) $(assemble_path)/bin/ntjoin_assemble.py -p $(prefix) -n $(n) -s $< -l $(target_weight) \
	-r "$(reference_weights)" -k $(k) -g $(g) -t $(assemble_t) -m $(m) $(min_ref_targets)
endif
endif
endif

$(target).k$(k).w$(w).n$(n).unassigned.scaffolds.fa: $(target).k$(k).w$(w).n$(n).assigned.scaffolds.fa
	touch $@

%.all.scaffolds.fa: %.assigned.scaffolds.fa %.unassigned.scaffolds.fa
	$(time) cat $^ > $@

%.fa.bam: %.fa
	minimap2 -a -x asm5 -r100000 -t $(t) $(ref) $< |samtools view -b |samtools sort -o $@

%.bam.bai: %.bam
	samtools index $<

# Run QUAST
quast_$(prefix)/report.tsv: $(references) $(target) $(target).k$(k).w$(w).n$(n).all.scaffolds.fa
ifeq ($(large), 1)
	quast -t $(t) -o quast_$(prefix) -r $(ref) --fast --scaffold-gap-max-size 100000 --split-scaffolds \
	--large $^
else
	quast -t $(t) -o quast_$(prefix) -r $(ref) --fast --scaffold-gap-max-size 100000 --split-scaffolds \
	$^
endif